Kernel Author:
Bhishan Poudel, Ph.D Contd. Astrophysics

Update: Jan 17, 2020 Fri
Date    : Jan 13, 2020

Introduction

Update

  1. Looked at gm0 vs gc0 (and gm1 vs gc1) 45 degree line and removed outliers.
  2. Find the weights for g_sq for given magnitude bins using smooth fitting curve.

Usual Filtering

df = df.query('calib_psfCandidate == 0.0')
df = df.query('deblend_nChild == 0.0')
df['ellip'] = np.hypot( df['ext_shapeHSM_HsmShapeRegauss_e1'] ,
                        df['ext_shapeHSM_HsmShapeRegauss_e2'] )
df = df.query('ellip < 2.0') # it was 1.5 before

#select only few columns after filtering:
cols_select = ['base_SdssCentroid_x', 'base_SdssCentroid_y',
                'base_SdssCentroid_xSigma','base_SdssCentroid_ySigma',
                'ext_shapeHSM_HsmShapeRegauss_e1','ext_shapeHSM_HsmShapeRegauss_e2',
                'base_SdssShape_flux']
df = df[cols_select]        

# drop all nans
df = df.dropna()

# additional columns
df['radius'] =  df.eval(""" ( (ext_shapeHSM_HsmSourceMoments_xx *  ext_shapeHSM_HsmSourceMoments_yy) \
                                          -  (ext_shapeHSM_HsmSourceMoments_xy**2 ) )**0.25 """)

Shape filtering
https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb

df = df.query('ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3')
df = df.query('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4')
df = df.query('ext_shapeHSM_HsmShapeRegauss_flag== 0.0')

Filter strongly lensed objects

  • Take the objects with centroids >154 pixels (remove strong lens objects).
    # exclude strong lens objects <=154 distance
    # The shape of lsst.fits file is 3998,3998 and center is 1699,1699.
    df['x_center'] = 1699
    df['y_center'] = 1699
    df['distance'] = ( (df['x[0]'] - df['x_center'])**2 + (df['x[1]'] - df['y_center'])**2 )**0.5
    df = df[df.distance > 154]
    

Imcat script

# create new columns and cleaning (four files)
lc -C -n fN -n id -N '1 2 x' -N '1 2 errx' -N '1 2 g' -n ellip -n flux -n radius < "${M9T}".txt  |  lc +all 'mag = %flux log10 -2.5 *'  |  cleancat 15  |  lc +all -r 'mag' > "${M9C}".cat


# merge 4 catalogs
mergecats 5 "${MC}".cat "${M9C}".cat "${LC}".cat "${L9C}".cat > ${catalogs}/merge.cat &&


lc -b +all 
'x = %x[0][0] %x[1][0] + %x[2][0] + %x[3][0] + 4 / %x[0][1] %x[1][1] + %x[2][1] + %x[3][1] + 4 / 2 vector'
'gm = %g[0][0] %g[1][0] + 2 / %g[0][1] %g[1][1] + 2 / 2 vector' 
'gc = %g[2][0] %g[3][0] + 2 / %g[2][1] %g[3][1] + 2 / 2 vector'   
'gmd = %g[0][0] %g[1][0] - 2 / %g[0][1] %g[1][1] - 2 / 2 vector' 
'gcd = %g[2][0] %g[3][0] - 2 / %g[2][1] %g[3][1] - 2 / 2 vector' 
< ${catalogs}/merge.cat > ${final}/final_${i}.cat

Notes

final_text.txt is created by imcat program after merging four lsst files (m,m9,l,l9) after cleaning.

Imports

In [1]:
import json, os,sys
import numpy as np
import pandas as pd
import seaborn as sns
import time
sns.set(color_codes=True)

import plotly
import ipywidgets

pd.set_option('display.max_columns',200)
time_start_notebook = time.time()

import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

print([(x.__name__, x.__version__) for x in [np,pd,sns,plotly,ipywidgets]])
[('numpy', '1.16.4'), ('pandas', '0.25.3'), ('seaborn', '0.9.0'), ('plotly', '4.4.1'), ('ipywidgets', '7.4.2')]
In [2]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

Load the final text cleancat15 data

g_sq = g00 g00 + g10 g10
gmd_sq = gmd0**2 + gmd1**2
In [3]:
!head -2 ../data/cleancat/final_text_cleancat15_100.txt
head: ../data/cleancat/final_text_cleancat15_100.txt: No such file or directory
In [4]:
names = "fN[0][0]       fN[1][0]       fN[2][0]       fN[3][0]       id[0][0]       id[1][0]       id[2][0]       id[3][0]           x[0]           x[1]     errx[0][0]     errx[0][1]     errx[1][0]     errx[1][1]     errx[2][0]     errx[2][1]     errx[3][0]     errx[3][1]        g[0][0]        g[0][1]        g[1][0]        g[1][1]        g[2][0]        g[2][1]        g[3][0]        g[3][1]    ellip[0][0]    ellip[1][0]    ellip[2][0]    ellip[3][0]     flux[0][0]     flux[1][0]     flux[2][0]     flux[3][0]   radius[0][0]   radius[1][0]   radius[2][0]   radius[3][0]      mag[0][0]      mag[1][0]      mag[2][0]      mag[3][0]          gm[0]          gm[1]          gc[0]          gc[1]         gmd[0]         gmd[1]         gcd[0]         gcd[1]"
print(names)
fN[0][0]       fN[1][0]       fN[2][0]       fN[3][0]       id[0][0]       id[1][0]       id[2][0]       id[3][0]           x[0]           x[1]     errx[0][0]     errx[0][1]     errx[1][0]     errx[1][1]     errx[2][0]     errx[2][1]     errx[3][0]     errx[3][1]        g[0][0]        g[0][1]        g[1][0]        g[1][1]        g[2][0]        g[2][1]        g[3][0]        g[3][1]    ellip[0][0]    ellip[1][0]    ellip[2][0]    ellip[3][0]     flux[0][0]     flux[1][0]     flux[2][0]     flux[3][0]   radius[0][0]   radius[1][0]   radius[2][0]   radius[3][0]      mag[0][0]      mag[1][0]      mag[2][0]      mag[3][0]          gm[0]          gm[1]          gc[0]          gc[1]         gmd[0]         gmd[1]         gcd[0]         gcd[1]
In [5]:
names = ['fN[0][0]','fN[1][0]','fN[2][0]','fN[3][0]',
 'id[0][0]','id[1][0]','id[2][0]','id[3][0]',
 'x[0]','x[1]',
 'errx[0][0]','errx[0][1]','errx[1][0]','errx[1][1]','errx[2][0]',
 'errx[2][1]','errx[3][0]','errx[3][1]',
 'g[0][0]','g[0][1]','g[1][0]','g[1][1]','g[2][0]','g[2][1]','g[3][0]','g[3][1]',
 'ellip[0][0]','ellip[1][0]','ellip[2][0]','ellip[3][0]',
 'flux[0][0]','flux[1][0]','flux[2][0]','flux[3][0]',
 'radius[0][0]','radius[1][0]','radius[2][0]','radius[3][0]',
 'mag[0][0]','mag[1][0]','mag[2][0]','mag[3][0]',
 'gm[0]','gm[1]','gc[0]', 'gc[1]',
 'gmd[0]','gmd[1]','gcd[0]','gcd[1]']


file_path = '../data/cleancat/final_text_cleancat15_000_099.txt'


df = pd.read_csv(file_path,comment='#',engine='python',sep=r'\s\s+',
                 header=None,names=names)

print(df.shape)

# new columns
# df['g_sq'] = df['g[0][0]'] **2 + df['g[1][0]']**2 # only for imcat 00 and 10
# df['gmd_sq'] = df['gmd[0]'] **2 + df['gmd[1]']**2

df['g_sq'] = df['g[0][0]'] **2 + df['g[0][1]']**2
df['gmd_sq'] = df['gmd[0]'] **2 + df['gmd[1]']**2

df['gm_sq'] = df['gm[0]']**2 + df['gm[1]']**2
df['gc_sq'] = df['gc[0]']**2 + df['gc[1]']**2

df['mag_mono'] = (df['mag[0][0]'] + df['mag[1][0]'] ) / 2
df['mag_chro'] = (df['mag[2][0]'] + df['mag[3][0]'] ) / 2

df.head()
(56861, 50)
Out[5]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro
0 0 0 0 0 5301 5314 5231 5117 88.17075 1847.19340 0.0196 0.0249 0.0227 0.0216 0.0200 0.0256 0.0231 0.0220 -0.4253 0.1855 0.2730 -0.3021 -0.4257 0.1904 0.2778 -0.3155 0.463994 0.407177 0.466340 0.420373 79841.4700 82737.3540 80303.9230 83923.9080 5.186953 5.293858 5.267827 5.390682 -12.255571 -12.294254 -12.261842 -12.309714 -0.07615 -0.05830 -0.07395 -0.06255 -0.34915 0.24380 -0.35175 0.25295 0.215290 0.181344 0.009198 0.009381 -12.274912 -12.285778
1 0 0 0 0 3941 3957 3897 3923 3214.45390 930.33603 0.0344 0.0212 0.0331 0.0232 0.0344 0.0212 0.0332 0.0232 0.9068 0.3231 0.7867 0.3391 0.9179 0.3265 0.7956 0.3416 0.962642 0.856671 0.974240 0.865835 33913.5470 34112.9040 33903.5430 34114.7980 4.676457 4.750963 4.675408 4.751770 -11.325933 -11.332297 -11.325613 -11.332357 0.84675 0.33110 0.85675 0.33405 0.06005 -0.00800 0.06115 -0.00755 0.926680 0.003670 0.826613 0.845610 -11.329115 -11.328985
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.34480 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.1640 3663.4596 4.161950 4.303319 4.159870 4.301257 -8.918813 -8.912980 -8.915847 -8.909728 -0.01825 0.06230 -0.00720 0.06735 0.97965 0.52580 1.01340 0.54025 1.270152 1.236180 0.004214 0.004588 -8.915896 -8.912788
3 0 0 0 0 3564 3564 3541 3538 2536.84490 712.48793 0.0071 0.0125 0.0071 0.0129 0.0074 0.0129 0.0074 0.0134 -1.0289 0.4499 -1.0196 0.3961 -0.9862 0.4332 -0.9816 0.3755 1.122963 1.093837 1.077150 1.050970 107866.5700 109330.9900 109214.9900 110405.2400 4.848973 4.967938 4.963241 5.096215 -12.582217 -12.596858 -12.595706 -12.607474 -1.02425 0.42300 -0.98390 0.40435 -0.00465 0.02690 -0.00230 0.02885 1.261045 0.000745 1.228017 1.131558 -12.589538 -12.601590
4 0 0 0 0 4634 4659 4569 4615 109.82575 1405.32120 0.3760 0.3919 0.2353 0.2783 0.3803 0.3949 0.2379 0.2831 0.2055 0.1655 -0.1817 -0.0868 0.2052 0.1698 -0.1773 -0.0987 0.263857 0.201368 0.266344 0.202921 3512.8911 3518.9333 3517.0462 3518.1529 4.338535 4.362407 4.374177 4.392268 -8.864162 -8.866028 -8.865445 -8.865787 0.01190 0.03935 0.01395 0.03555 0.19360 0.12615 0.19125 0.13425 0.069621 0.053395 0.001690 0.001458 -8.865095 -8.865616

Plot g-squared for monochromatic and chromatic files

In [6]:
fig,ax = plt.subplots(1,2,figsize=(14,6))

df['gm_sq'].plot.hist(bins=50,ax=ax[0],color='b',label='mono')
ax[0].set_xlabel('gm_sq')
ax[0].legend()
ax[0].set_title('g-squared histogram for mono')

# gcsq
df['gc_sq'].plot.hist(bins=50,ax=ax[1],color='r',label='chro')
ax[1].set_xlabel('gc_sq')
ax[1].legend()
ax[1].set_title('g-squared histogram for chro')

# savefig
plt.savefig('images/gmsq_gcsq_hist.png')
In [7]:
plt.figure(figsize=(12,8))
sns.distplot(df['g_sq'],label='g_sq')
sns.distplot(df['gmd_sq'],label='gmd_sq')

plt.legend()
Out[7]:
<matplotlib.legend.Legend at 0x11a675320>

Contour Plots

In [8]:
import plotly
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tls
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
In [9]:
def matrix_of_number_density_from_two_cols(df,xcol,ycol,N):
    """Create grid of number density of two columns
    
    - Find the absolute max from two columns.
    - Create N bins -absMax to +absMax.
    - Return a matrix of N*N shape.
    """
    from itertools import product
    
    # derived variables
    xlabel = xcol
    ylabel = ycol
    xlabel1 = xlabel + '_cat_labels'
    ylabel1 = ylabel + '_cat_labels'
    
    xlabel2 = xlabel + '_cat'
    ylabel2 = ylabel + '_cat'
    colname = 'cat_freq'
    
    # take only xlabel and ylabel columns
    dx = df[[xlabel, ylabel]].copy()
    
    # get absolute maximum from two columns
    tolerance = 0.0000001
    max_abs_xcol_ycol = df[[xcol,ycol]].describe().iloc[[3,-1],:].abs().max().max()
    
    # create 1d array with N+1 values to create N bins
#     bins = np.linspace(-max_abs_xcol_ycol-tolerance, max_abs_xcol_ycol+tolerance,N+1)
    bins = np.linspace(0, max_abs_xcol_ycol,N+1)

    # create N bins
    dx[xlabel1] = pd.cut(dx[xlabel], bins, labels=np.arange(N))
    dx[ylabel1] = pd.cut(dx[ylabel], bins, labels=np.arange(N))

    # count number of points in each bin
    dx[colname] = dx.groupby([xlabel1,ylabel1])[xlabel1].transform('count')

    # drop duplicates
    dx1 = dx.drop_duplicates(subset=[xlabel1,ylabel1])[[xlabel1,ylabel1,colname]]

    # use permutation to get the grid of N * N
    perms = list(product(range(N), range(N)))
    x = [i[0] for i in perms]
    y = [i[1] for i in perms]
    dx2 = pd.DataFrame({xlabel1: x, ylabel1: y, colname:0})

    # update dx2 to merge frequency values
    dx2.update(dx2.drop(colname,1).merge(dx1,how='left'))
    dx2 = dx2.astype(int)

    # z values to plot heatmap
    z = dx2[colname].values.reshape(N,N)
    z = z.T

    return z

Transform and scale the data

In [10]:
def transform_scale(z,transform='linear',scale=None):
    """Transform and scale given 1d numpy array.
    
    transform: linear, log, sqrt, sinh, arcsinh
    scale    : minmax, zscale
    
    """
    #==================================
    # linear, log, sqrt, arcsinh, sinh 
    #
    # we need linear tranform option to compare.
    if transform == 'linear':
        z = z

    if transform == 'log':
        z = np.log1p(z)
        
    if transform == 'sqrt':
        z = np.sqrt(z)

    if transform == 'sinh':
        z = np.sinh(z)
        
    if transform == 'arcsinh':
        z = np.arcsinh(z)
    
    #===============================
    # scaling minmax and zscale
    if scale== 'minmax':
        z = z / (z.max()-z.min())
    if scale == 'zscale':
        z = (z-z.mean()) / z.std()
        
    return z

plot the countours

In [11]:
def plot_contour(Z,colorscale,x1y1x2y2=None,
                 title='Contour plot',
                 xlabel='',
                 ylabel=''):
    """Plot the contour plot.

    Contour plot from given 2d array.
    
    Example:
    -----------
    z  = matrix_of_number_density_from_two_cols(df,'gsq','gmdsq',N)
    z1 = transform_scale(z,transform=transform,scale=scale)

    plot_contour(z1, colorscale='Viridis', x1y1x2y2=[10,0,99,90],
            title=f'Contour plot: {transform}+{scale}',
            xlabel='Bin number of gsq',
            ylabel='Bin number of gmsq')
    
    """

    trace1 = go.Contour(z=Z, colorscale=colorscale)
    axis_layout = dict(
        showticklabels = True
    )
    
    xaxis = {**axis_layout, **{'title':f'{xlabel}'}}
    yaxis = {**axis_layout, **{'title':f'{ylabel}'}}

    layout = go.Layout(
        width=1000,
        height=1000,
        autosize=False,
        title=f'{title} ',
        xaxis = xaxis,
        yaxis = yaxis,
    )
    

    data = [trace1]
    
    if x1y1x2y2:

        # center line 
        p2x1, p2y1 = 0,0
        p2x2, p2y2 = 99,99
        sc1 = go.Scatter(x=[p2x1,p2x2],
                         y=[p2y1,p2y2],
                         mode = 'lines+markers',
                         name = f'line ({p2x1},{p2y1}) to ({p2x2},{p2y2})')
        
        sc2 = go.Scatter(x=[x1y1x2y2[0],x1y1x2y2[2]],
                         y=[x1y1x2y2[1],x1y1x2y2[3]],
                         mode = 'lines+markers',
                         name = f'line ({x1y1x2y2[0]},{x1y1x2y2[1]}) \
                         to ({x1y1x2y2[2]},{x1y1x2y2[3]})')


        data = [trace1, sc1, sc2]

    fig = go.Figure( data=data, layout=layout )
    
    # update figure
    fig.update_layout(
    xaxis = dict(tickmode = 'linear',dtick = 5),
    yaxis = dict(tickmode = 'linear',dtick = 5),
    )

    iplot(fig)
    return None

N = 100
transform='log'
scale='zscale'


xcol,ycol = 'g_sq', 'gmd_sq'
z  = matrix_of_number_density_from_two_cols(df,xcol,ycol,N)
z1 = transform_scale(z,transform=transform,scale=scale)

plot_contour(z1, colorscale='Viridis', x1y1x2y2=[10,0,99,90],
            title=f'Contour plot: {transform}+{scale}',
            xlabel=f'Bin number of {xcol}',
            ylabel=f'Bin number of {ycol}')

Grid of N*N from g_sq and gmd_sq

In [12]:
N = 100
xcol = 'g_sq'
ycol = 'gmd_sq'
max_abs_xcol_ycol = df[[xcol,ycol]].abs().max().max()

print(max_abs_xcol_ycol)

df[df[xcol]==max_abs_xcol_ycol]
3.99436938
Out[12]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro
21605 38 38 38 38 6451 6450 6448 6407 2519.428 2550.8574 0.0455 0.0406 0.0467 0.0424 0.0459 0.0408 0.0473 0.0426 1.9293 0.5217 1.5932 -0.1021 1.719 0.451 1.4994 -0.1034 1.998592 1.596468 1.777178 1.502961 12487.845 12648.607 12818.484 12914.576 4.594356 4.671837 4.688169 4.746876 -10.241219 -10.255107 -10.269592 -10.2777 1.76125 0.2098 1.6092 0.1738 0.16805 0.3119 0.1098 0.2772 3.994369 0.125522 3.146018 2.619731 -10.248163 -10.273646
In [13]:
# create 1d array with N+1 values to create N bins
bins = np.linspace(0, max_abs_xcol_ycol,N+1)

# bins dict
bins_dict = {i:v for i,v in enumerate(bins)}

# create N bins
ser_ycol_bins = pd.cut(df[ycol], bins=bins,)

df['g_sq_bins'] = ser_ycol_bins
df[['g_sq','g_sq_bins']].head()
Out[13]:
g_sq g_sq_bins
0 0.215290 (0.16, 0.2]
1 0.926680 (0.0, 0.0399]
2 1.270152 (1.198, 1.238]
3 1.261045 (0.0, 0.0399]
4 0.069621 (0.0399, 0.0799]
In [14]:
# bin 0  ==> 0          to 0.03994369
# bin 1  ==> 0.03994369 to 0.07988739
# bin 10 ==> 0.39943694 to 0.43938063

bins[10], bins[11]
Out[14]:
(0.399436938, 0.4393806318)

Analysis of Points above gmd_sq bin number 10

What is the corresponding gmsq value to y-axis bin number 10 (11th bin)?

The 100*100 bin is created from absMax of g_sq and gmd_sq. Then the line 0 to absMax is divided into 100 parts and bins are created.

bin 10 for gmd_sq is 0.39943694 to 0.43938063

In [15]:
# take all points where gmd_sq > 10th bin

df_gmd_sq_lt_bin10 = df[df.gmd_sq > bins[10]]

print(df_gmd_sq_lt_bin10.shape)

df_gmd_sq_lt_bin10.head()
(14158, 57)
Out[15]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.3448 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.164 3663.4596 4.161950 4.303319 4.159870 4.301257 -8.918813 -8.912980 -8.915847 -8.909728 -0.01825 0.06230 -0.00720 0.06735 0.97965 0.52580 1.01340 0.54025 1.270152 1.236180 0.004214 0.004588 -8.915896 -8.912788 (1.198, 1.238]
5 0 0 0 0 5592 1389 5393 1394 1668.12470 1882.8265 0.0453 0.0393 0.0394 0.0218 0.0457 0.0395 0.0393 0.0218 0.2646 0.9514 0.8981 -0.2964 0.2685 0.9593 0.9048 -0.3017 0.987510 0.945747 0.996167 0.953775 49474.3730 45594.8360 49496.897 45655.9450 5.496548 5.503342 5.499099 5.507981 -11.735951 -11.647289 -11.736445 -11.648743 0.58135 0.32750 0.58665 0.32880 -0.31675 0.62390 -0.31815 0.63050 0.975175 0.489582 0.445224 0.452268 -11.691620 -11.692594 (0.479, 0.519]
8 0 0 0 0 5895 5967 5826 5909 1448.97510 2334.9766 0.0188 0.0227 0.0239 0.0182 0.0189 0.0229 0.0241 0.0183 -0.2900 0.9268 0.6441 -0.7562 -0.3066 0.9498 0.6584 -0.7697 0.971112 0.993329 0.998060 1.012881 44277.4560 44510.9310 44328.676 44646.1230 4.390932 4.486290 4.404279 4.505218 -11.615457 -11.621167 -11.616712 -11.624459 0.17705 0.08530 0.17590 0.09005 -0.46705 0.84150 -0.48250 0.85975 0.943058 0.926258 0.038623 0.039050 -11.618312 -11.620586 (0.919, 0.959]
16 0 0 0 0 1916 1935 1936 1936 987.00513 2672.3523 0.0075 0.0098 0.0099 0.0079 0.0075 0.0098 0.0100 0.0079 -0.4763 0.8956 0.6350 -0.8553 -0.5089 0.9281 0.6544 -0.8827 1.014377 1.065253 1.058465 1.098817 91854.4360 93143.4830 91915.122 93209.8860 4.097640 4.188533 4.106857 4.199608 -12.407750 -12.422881 -12.408467 -12.423655 0.07935 0.02015 0.07275 0.02270 -0.55565 0.87545 -0.58165 0.90540 1.028961 1.075160 0.006702 0.005808 -12.415316 -12.416061 (1.039, 1.078]
17 0 0 0 0 4798 1156 4737 1155 1993.05820 1545.4589 0.0838 0.0597 0.0636 0.1048 0.0842 0.0598 0.0637 0.1052 1.0009 -0.3474 -0.7955 0.6838 1.0567 -0.3781 -0.8061 0.6933 1.059475 1.049001 1.122308 1.063232 10896.1520 10999.3280 10882.732 10990.3080 4.145457 4.541074 4.145158 4.539750 -10.093183 -10.103415 -10.091845 -10.102525 0.10270 0.16820 0.12530 0.15760 0.89820 -0.51560 0.93140 -0.53570 1.122488 1.072607 0.038839 0.040538 -10.098299 -10.097185 (1.039, 1.078]
In [16]:
# fN[0][0]  ==> lsst_mono_z1.5_000.fits
# fN[1][0]  ==> lsst_mono90_z1.5_000.fits
#
# id[0][0]  ==> id of given object for mono file number 0
In [17]:
# take only first file number 0 (it has m,m9,c and c9)

df_gmd_sq_lt_bin10_file0 = df_gmd_sq_lt_bin10[df_gmd_sq_lt_bin10['fN[0][0]'] == 0]

# add radius for mono
df_gmd_sq_lt_bin10_file0['radius_mono'] = \
(df_gmd_sq_lt_bin10_file0['radius[0][0]'] + 
 df_gmd_sq_lt_bin10_file0['radius[1][0]'] ) /2.0

print(df_gmd_sq_lt_bin10_file0.shape)

df_gmd_sq_lt_bin10_file0.head()
(120, 58)
Out[17]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins radius_mono
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.3448 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.164 3663.4596 4.161950 4.303319 4.159870 4.301257 -8.918813 -8.912980 -8.915847 -8.909728 -0.01825 0.06230 -0.00720 0.06735 0.97965 0.52580 1.01340 0.54025 1.270152 1.236180 0.004214 0.004588 -8.915896 -8.912788 (1.198, 1.238] 4.232634
5 0 0 0 0 5592 1389 5393 1394 1668.12470 1882.8265 0.0453 0.0393 0.0394 0.0218 0.0457 0.0395 0.0393 0.0218 0.2646 0.9514 0.8981 -0.2964 0.2685 0.9593 0.9048 -0.3017 0.987510 0.945747 0.996167 0.953775 49474.3730 45594.8360 49496.897 45655.9450 5.496548 5.503342 5.499099 5.507981 -11.735951 -11.647289 -11.736445 -11.648743 0.58135 0.32750 0.58665 0.32880 -0.31675 0.62390 -0.31815 0.63050 0.975175 0.489582 0.445224 0.452268 -11.691620 -11.692594 (0.479, 0.519] 5.499945
8 0 0 0 0 5895 5967 5826 5909 1448.97510 2334.9766 0.0188 0.0227 0.0239 0.0182 0.0189 0.0229 0.0241 0.0183 -0.2900 0.9268 0.6441 -0.7562 -0.3066 0.9498 0.6584 -0.7697 0.971112 0.993329 0.998060 1.012881 44277.4560 44510.9310 44328.676 44646.1230 4.390932 4.486290 4.404279 4.505218 -11.615457 -11.621167 -11.616712 -11.624459 0.17705 0.08530 0.17590 0.09005 -0.46705 0.84150 -0.48250 0.85975 0.943058 0.926258 0.038623 0.039050 -11.618312 -11.620586 (0.919, 0.959] 4.438611
16 0 0 0 0 1916 1935 1936 1936 987.00513 2672.3523 0.0075 0.0098 0.0099 0.0079 0.0075 0.0098 0.0100 0.0079 -0.4763 0.8956 0.6350 -0.8553 -0.5089 0.9281 0.6544 -0.8827 1.014377 1.065253 1.058465 1.098817 91854.4360 93143.4830 91915.122 93209.8860 4.097640 4.188533 4.106857 4.199608 -12.407750 -12.422881 -12.408467 -12.423655 0.07935 0.02015 0.07275 0.02270 -0.55565 0.87545 -0.58165 0.90540 1.028961 1.075160 0.006702 0.005808 -12.415316 -12.416061 (1.039, 1.078] 4.143086
17 0 0 0 0 4798 1156 4737 1155 1993.05820 1545.4589 0.0838 0.0597 0.0636 0.1048 0.0842 0.0598 0.0637 0.1052 1.0009 -0.3474 -0.7955 0.6838 1.0567 -0.3781 -0.8061 0.6933 1.059475 1.049001 1.122308 1.063232 10896.1520 10999.3280 10882.732 10990.3080 4.145457 4.541074 4.145158 4.539750 -10.093183 -10.103415 -10.091845 -10.102525 0.10270 0.16820 0.12530 0.15760 0.89820 -0.51560 0.93140 -0.53570 1.122488 1.072607 0.038839 0.040538 -10.098299 -10.097185 (1.039, 1.078] 4.343265

Regions above and below for gsq vs gmdsq contour plot

For example, take points:
Lower line below the diagonal line
point on x-axis: x1,y1 = 10,0
point on y-axis: x2,y2 = 99,90

here 20,0,99,80 are bin number, their values are obtained from bins_dict
x1,y1 = bins_dict[10], bins_dict[0]
x2,y2 = bins_dict[99], bins_dict[90]


Equation of straight line:
y-y1 = y2-y1 * (x-x1)
       -----
       x2-x1

boundary: (x2-x1) * (y-y1) - (y2-y1) * (x-x1)
In [18]:
N = 100
bins = np.linspace(0, max_abs_xcol_ycol,N+1)
bins_dict = {i:v for i,v in enumerate(bins)}

# points from contour plot
x1y1x2y2 = [10,0,99,90]
print(x1y1x2y2)

# actual values of x1, y1, x2, and y2
x1,y1 = bins_dict[x1y1x2y2[0]], bins_dict[x1y1x2y2[1]]
x2,y2 = bins_dict[x1y1x2y2[2]], bins_dict[x1y1x2y2[3]]


df['above'] = df.eval(" ( (@x2-@x1) * gmd_sq )  >= ( (@y2-@y1) * (g_sq - @x1 )) ")


print(df['above'].value_counts())
print()
print(df['above'].value_counts(normalize=True))
[10, 0, 99, 90]
True     31077
False    25784
Name: above, dtype: int64

True     0.546543
False    0.453457
Name: above, dtype: float64
In [19]:
df[df.above==True]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='b',label='Above')

nabove = len(df[df.above==True])
nbelow = len(df[df.above==False])

above_pct = nabove/(nabove+nbelow)*100
below_pct = 100-above_pct
text = f'Above = {nabove:,} ({above_pct:,.0f}%)\nBelow = {nbelow:,} ( {below_pct:,.0f}%)'

plt.text(0.5,10_000,text,fontsize=14)
plt.legend()

plt.xlabel('gm_sq')
plt.title('gm_sq histograms for above and below cases', fontsize=20)

df[df.above==False]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5,label='Below')
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms below the boundary', fontsize=20)
Out[19]:
Text(0.5, 1.0, 'gm_sq histograms below the boundary')
In [20]:
df[df.above==True]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5)
plt.xlabel('gm_sq')
plt.title('gm_sq histogram', fontsize=20)
Out[20]:
Text(0.5, 1.0, 'gm_sq histogram')
In [21]:
df[df.above==False]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5,label='Below')
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms below the boundary', fontsize=20)
Out[21]:
Text(0.5, 1.0, 'gm_sq histograms below the boundary')

Now, Take only the data above the boundary as cleaned data

In [22]:
rows_before = df.shape[0]
df = df[df.above==True]

print(f'Before rows: {rows_before}, After rows: {df.shape[0]}')
Before rows: 56861, After rows: 31077
In [23]:
plt.figure(figsize=(12,8))
sns.distplot(df['gm_sq'],label='gm_sq')
plt.legend()
plt.savefig('images/gmsq_histogram_after_cleaning.png',dpi=300)

gm vs gc Plots

Equation of straight line:
y-y1 = y2-y1 * (x-x1)
       -----
       x2-x1

boundary: (x2-x1) * (y-y1) - (y2-y1) * (x-x1)
In [24]:
fig,ax = plt.subplots(1,2,figsize=(16,8))
df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])

ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)

ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)

# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')

plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);

Remove outliers based on gm vs gc plot

In [25]:
# Two scatter plots
#===========================================
fig,ax = plt.subplots(1,2,figsize=(16,8))
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);

df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])

ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)

ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)

# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')



# Left plot gm0 vs gc0 parallel lines
#===========================================
x = df['gm[0]'].to_numpy()
y = df['gc[0]'].to_numpy()

# Find distance for parallel lines
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

# left plot parallel lines
ax[0].plot(y1,x,'b-.',label=f'{n} sigma')
ax[0].plot(y2,x,'b-.')


# Right plot gm1 vs gc1 parallel lines
#===========================================
x = df['gm[1]'].to_numpy()
y = df['gc[1]'].to_numpy()

sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

ax[1].plot(y1,x,'b-.', label=f'{n} sigma')
ax[1].plot(y2,x,'b-.')


#=============
# Add legends
# plt.tight_layout()
ax[0].legend(loc='upper left')
ax[1].legend(loc='upper left')

plt.show()
In [26]:
# Remove outliers for gc0 vs gm0
n = 10
sigma = (df['gm[0]'] - df['gc[0]']).std()
d = n * sigma
c = d/np.sqrt(2)

cond_upper = (df['gc[0]'] - df['gm[0]'] <= c)
cond_below = (df['gm[0]'] - df['gc[0]'] <= c)

df = df[ cond_upper & cond_below ]


# df.plot.scatter(x='gm[0]',y='gc[0]')
In [27]:
# Remove outliers for gc1 vs gm1
n = 10
sigma = (df['gm[1]'] - df['gc[1]']).std()
d = n * sigma
c = d/np.sqrt(2)

cond_upper = (df['gc[1]'] - df['gm[1]'] <= c)
cond_below = (df['gm[1]'] - df['gc[1]'] <= c)

df = df[ cond_upper & cond_below ]


# df.plot.scatter(x='gm[1]',y='gc[1]')
In [28]:
# Two scatter plots
#===========================================
fig,ax = plt.subplots(1,2,figsize=(16,8))
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);

df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])

ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)

ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)

# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')



# Left plot gm0 vs gc0 parallel lines
#===========================================
x = df['gm[0]'].to_numpy()
y = df['gc[0]'].to_numpy()

# Find distance for parallel lines
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

# left plot parallel lines
ax[0].plot(y1,x,'b-.',label=f'{n} sigma')
ax[0].plot(y2,x,'b-.')




# Right plot gm1 vs gc1 parallel lines
#===========================================
x = df['gm[1]'].to_numpy()
y = df['gc[1]'].to_numpy()

sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

ax[1].plot(y1,x,'b-.', label=f'{n} sigma')
ax[1].plot(y2,x,'b-.')


#=============
# Add legends
# plt.tight_layout()
ax[0].legend(loc='upper left')
ax[1].legend(loc='upper left')

plt.show()

Plot magnidude for different bin numbers

In [29]:
df.filter(regex='mag').head(2)
Out[29]:
mag[0][0] mag[1][0] mag[2][0] mag[3][0] mag_mono mag_chro
0 -12.255571 -12.294254 -12.261842 -12.309714 -12.274912 -12.285778
2 -8.918813 -8.912980 -8.915847 -8.909728 -8.915896 -8.912788
In [30]:
def plot_bin_mag_mono_chro(nbins,show=False):
    import os
    
    if not os.path.isdir('images'):
        os.makedirs('images')
    
    df['bins_mag_mono'] = pd.cut(df['mag_mono'],nbins)
    df['bins_mag_chro'] = pd.cut(df['mag_chro'],nbins)
    
    text_mono = df.groupby('bins_mag_mono')['gm_sq'].count().to_string()
    text_chro = df.groupby('bins_mag_chro')['gc_sq'].count().to_string()

    # plot
    fig,ax = plt.subplots(1,2,figsize=(12,8))
    
    # mono
    df.groupby('bins_mag_mono')['gm_sq'].mean().plot(marker='o',ax=ax[0])
    ax[0].tick_params(axis='x', rotation=90)
    ax[0].set_ylabel('gm_sq_mean',fontsize=18)
    ax[0].set_xlabel('bin_mag_mono',fontsize=18)
    ax[0].set_title(f'gm_sq per magnitude bins with nbins = {nbins}')
    ax[0].text(0,0.5,text_mono,fontsize=14,va='center')
    ax[0].set_ylim(0,1)
    ax[0].set_yticks(np.arange(0, 1, step=0.1))
    
    # chro
    df.groupby('bins_mag_chro')['gc_sq'].mean().plot(marker='o',ax=ax[1])
    ax[1].tick_params(axis='x', rotation=90)
    ax[1].set_ylabel('gc_sq_mean',fontsize=18)
    ax[1].set_xlabel('bin_mag_chro',fontsize=18)
    ax[1].set_title(f'gc_sq per magnitude bins with nbins = {nbins}')
    ax[1].text(0,0.5,text_chro,fontsize=14,va='center')
    ax[1].set_ylim(0,1)
    ax[1].set_yticks(np.arange(0, 1, step=0.1))
    
    plt.tight_layout()
    
    plt.savefig(f'images/bin_mag_mono_chro_{nbins}.png')
    

    if show:
        plt.show()
    plt.close()

for nbins in range(5,15):
    plot_bin_mag_mono_chro(nbins,show=True)

Magnitude weight column for Monochromatic case

In [31]:
df['mag_mono'].plot.hist()
Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d002240>
In [32]:
from scipy.optimize import curve_fit

# look at case when nbins = 9 and when the curve is going up
mag_low_nbins9 = (-12.884-12.028) / 2
mag_high_nbins9 = (-10.314-9.457) / 2

xcol = 'mag_mono'
ycol = 'gm_sq'

x = df.query("""  @mag_low_nbins9 < mag_mono <  @mag_high_nbins9  """)[xcol].to_numpy()
y = df.query("""  @mag_low_nbins9 < mag_mono <  @mag_high_nbins9  """)[ycol].to_numpy()

def func(x, a, b):
    return a*x + b

params, _ = curve_fit(func, x, y)
[a, b] = params.round(2)

print(f'magnitude ranges for mono: {mag_low_nbins9}, {mag_high_nbins9}')
print(f'fitting params   for mono: {a}, {b}' )
magnitude ranges for mono: -12.456, -9.8855
fitting params   for mono: 0.04, 0.54
In [33]:
def magnitude_weight_mono(mag):
    if mag < -12.456:
        return 1/ (-12.456 *a + b)
    
    else:
        return 1/ (a*mag + b)

df['wt_mag_mono'] = df['mag_mono'].apply(magnitude_weight_mono)
df['wt_mag_mono'] = df['wt_mag_mono'] / df['wt_mag_mono'].mean() # normalize by mean

Magnitude weight column for Chromatic case

In [34]:
df['mag_chro'].plot.hist()
Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x123f1e828>
In [35]:
from scipy.optimize import curve_fit

# look at case when nbins = 9 and when the curve is going up
mag_low_nbins9 = (-12.895-12.041) / 2
mag_high_nbins9 = (-10.333-9.479) / 2

xcol = 'mag_chro'
ycol = 'gc_sq'

x = df.query("""  @mag_low_nbins9 < mag_chro <  @mag_high_nbins9  """)[xcol].to_numpy()
y = df.query("""  @mag_low_nbins9 < mag_chro <  @mag_high_nbins9  """)[ycol].to_numpy()

def func(x, a, b):
    return a*x + b

params, _ = curve_fit(func, x, y)
[a, b] = params.round(2)

print(f'magnitude ranges for chro: {mag_low_nbins9}, {mag_high_nbins9}')
print(f'fitting params   for chro: {a}, {b}' )
magnitude ranges for chro: -12.468, -9.905999999999999
fitting params   for chro: 0.04, 0.55
In [36]:
def magnitude_weight_chro(mag):
    if mag < -12.468:
        return 1/ (-12.468*a + b)
    
    else:
        return 1/ (a*mag + b)

df['wt_mag_chro'] = df['mag_chro'].apply(magnitude_weight_chro)
df['wt_mag_chro'] = df['wt_mag_chro'] / df['wt_mag_chro'].mean() # normalize by mean

# mean
df['wt_mag']      = (df['wt_mag_mono'] + df['wt_mag_chro']) / 2

# df.drop(['wt_mag_chro','wt_mag_mono'],axis=1,inplace=True)

df.iloc[:2,-7:]
Out[36]:
g_sq_bins above bins_mag_mono bins_mag_chro wt_mag_mono wt_mag_chro wt_mag
0 (0.16, 0.2] True (-12.728, -12.268] (-12.724, -12.263] 1.456448 1.425518 1.440983
2 (1.198, 1.238] True (-9.044, -8.584] (-9.033, -8.571] 0.389231 0.431504 0.410367

Ellipticity Components Transformation

c2 = (dx * dx - dy * dy) / (r * r);
s2 = 2 * dx * dy / (r * r);
eX = s2 * e[0] + c2 * e[1];
eesum += eX * eX * w[0] * w[0];
eTsum[bin] -= (c2 * e[0] + s2 * e[1]) * w[0];
In [37]:
df.head(2)
Out[37]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins above bins_mag_mono bins_mag_chro wt_mag_mono wt_mag_chro wt_mag
0 0 0 0 0 5301 5314 5231 5117 88.17075 1847.1934 0.0196 0.0249 0.0227 0.0216 0.0200 0.0256 0.0231 0.0220 -0.4253 0.1855 0.2730 -0.3021 -0.4257 0.1904 0.2778 -0.3155 0.463994 0.407177 0.466340 0.420373 79841.4700 82737.3540 80303.923 83923.9080 5.186953 5.293858 5.267827 5.390682 -12.255571 -12.294254 -12.261842 -12.309714 -0.07615 -0.0583 -0.07395 -0.06255 -0.34915 0.2438 -0.35175 0.25295 0.215290 0.181344 0.009198 0.009381 -12.274912 -12.285778 (0.16, 0.2] True (-12.728, -12.268] (-12.724, -12.263] 1.456448 1.425518 1.440983
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.3448 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.164 3663.4596 4.161950 4.303319 4.159870 4.301257 -8.918813 -8.912980 -8.915847 -8.909728 -0.01825 0.0623 -0.00720 0.06735 0.97965 0.5258 1.01340 0.54025 1.270152 1.236180 0.004214 0.004588 -8.915896 -8.912788 (1.198, 1.238] True (-9.044, -8.584] (-9.033, -8.571] 0.389231 0.431504 0.410367
In [38]:
# constants
RMIN = 10
DLNR = 0.5

df['dx'] = df['x[0]'] - 1699 # jesisim fitsfiles have shape 3398, 3398
df['dy'] = df['x[1]'] - 1699

df['r'] = np.hypot(df['dx'], df['dy'])
# df['r'] = np.sqrt(df['dx']**2 + df['dy']**2)

df['cos2theta'] = df.eval(' (dx * dx - dy * dy) / (r * r)' )
df['sin2theta'] = df.eval(' (2  * dx * dy     ) / (r * r)' )

df['bin'] = ( np.log(df.r / RMIN) / DLNR).astype(int)

df['bin'].value_counts()
Out[38]:
9     12232
10    11073
8      4765
7      2001
6       707
5        79
3        35
4         8
2         6
1         5
0         5
Name: bin, dtype: int64
In [39]:
df['eX_mono'] =       df['sin2theta'] * df['gm[0]'] + df['cos2theta'] * df['gm[1]']
df['eT_mono'] = -1 * (df['cos2theta'] * df['gm[0]'] + df['sin2theta'] * df['gm[1]'] ) 

df['eX_chro'] =       df['sin2theta'] * df['gc[0]'] + df['cos2theta'] * df['gc[1]']
df['eT_chro'] = -1 * (df['cos2theta'] * df['gc[0]'] + df['sin2theta'] * df['gc[1]']  )
In [40]:
df['eT_mono_times_wt'] = df['eT_mono'] * df['wt_mag']
df['eT_chro_times_wt'] = df['eT_chro'] * df['wt_mag']
In [41]:
df.head()
Out[41]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins above bins_mag_mono bins_mag_chro wt_mag_mono wt_mag_chro wt_mag dx dy r cos2theta sin2theta bin eX_mono eT_mono eX_chro eT_chro eT_mono_times_wt eT_chro_times_wt
0 0 0 0 0 5301 5314 5231 5117 88.17075 1847.1934 0.0196 0.0249 0.0227 0.0216 0.0200 0.0256 0.0231 0.0220 -0.4253 0.1855 0.2730 -0.3021 -0.4257 0.1904 0.2778 -0.3155 0.463994 0.407177 0.466340 0.420373 79841.4700 82737.3540 80303.9230 83923.9080 5.186953 5.293858 5.267827 5.390682 -12.255571 -12.294254 -12.261842 -12.309714 -0.07615 -0.05830 -0.07395 -0.06255 -0.34915 0.24380 -0.35175 0.25295 0.215290 0.181344 0.009198 0.009381 -12.274912 -12.285778 (0.16, 0.2] True (-12.728, -12.268] (-12.724, -12.263] 1.456448 1.425518 1.440983 -1610.82925 148.1934 1617.631650 0.983215 -0.182452 10 -0.043428 0.064235 -0.048008 0.061296 0.092561 0.088327
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.3448 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.1640 3663.4596 4.161950 4.303319 4.159870 4.301257 -8.918813 -8.912980 -8.915847 -8.909728 -0.01825 0.06230 -0.00720 0.06735 0.97965 0.52580 1.01340 0.54025 1.270152 1.236180 0.004214 0.004588 -8.915896 -8.912788 (1.198, 1.238] True (-9.044, -8.584] (-9.033, -8.571] 0.389231 0.431504 0.410367 953.56650 73.3448 956.383045 0.988237 0.152928 9 0.058776 0.008508 0.065457 -0.003184 0.003491 -0.001307
4 0 0 0 0 4634 4659 4569 4615 109.82575 1405.3212 0.3760 0.3919 0.2353 0.2783 0.3803 0.3949 0.2379 0.2831 0.2055 0.1655 -0.1817 -0.0868 0.2052 0.1698 -0.1773 -0.0987 0.263857 0.201368 0.266344 0.202921 3512.8911 3518.9333 3517.0462 3518.1529 4.338535 4.362407 4.374177 4.392268 -8.864162 -8.866028 -8.865445 -8.865787 0.01190 0.03935 0.01395 0.03555 0.19360 0.12615 0.19125 0.13425 0.069621 0.053395 0.001690 0.001458 -8.865095 -8.865616 (0.0399, 0.0799] True (-9.044, -8.584] (-9.033, -8.571] 0.384965 0.427336 0.406151 -1589.17425 -293.6788 1616.082311 0.933954 0.357394 10 0.041004 -0.025178 0.038188 -0.025734 -0.010226 -0.010452
8 0 0 0 0 5895 5967 5826 5909 1448.97510 2334.9766 0.0188 0.0227 0.0239 0.0182 0.0189 0.0229 0.0241 0.0183 -0.2900 0.9268 0.6441 -0.7562 -0.3066 0.9498 0.6584 -0.7697 0.971112 0.993329 0.998060 1.012881 44277.4560 44510.9310 44328.6760 44646.1230 4.390932 4.486290 4.404279 4.505218 -11.615457 -11.621167 -11.616712 -11.624459 0.17705 0.08530 0.17590 0.09005 -0.46705 0.84150 -0.48250 0.85975 0.943058 0.926258 0.038623 0.039050 -11.618312 -11.620586 (0.919, 0.959] True (-11.807, -11.347] (-11.801, -11.34] 0.948232 0.980210 0.964221 -250.02490 635.9766 683.358388 -0.732269 -0.681016 8 -0.183036 0.187739 -0.185731 0.190132 0.181022 0.183329
16 0 0 0 0 1916 1935 1936 1936 987.00513 2672.3523 0.0075 0.0098 0.0099 0.0079 0.0075 0.0098 0.0100 0.0079 -0.4763 0.8956 0.6350 -0.8553 -0.5089 0.9281 0.6544 -0.8827 1.014377 1.065253 1.058465 1.098817 91854.4360 93143.4830 91915.1220 93209.8860 4.097640 4.188533 4.106857 4.199608 -12.407750 -12.422881 -12.408467 -12.423655 0.07935 0.02015 0.07275 0.02270 -0.55565 0.87545 -0.58165 0.90540 1.028961 1.075160 0.006702 0.005808 -12.415316 -12.416061 (1.039, 1.078] True (-12.728, -12.268] (-12.724, -12.263] 1.644972 1.564745 1.604859 -711.99487 973.3523 1205.964923 -0.302869 -0.953032 9 -0.081726 0.043236 -0.076208 0.043668 0.069388 0.070080

Radial Bins groupings

In [42]:
df_radial_bins = df.groupby('bin').agg({'r': 'mean',
                                        'wt_mag': 'sum',
                                        'eT_mono_times_wt': 'sum',
                                        'eT_chro_times_wt': 'sum',
                                       })


df_radial_bins.columns = ['r_mean',
                          'wt_mag_sum',
                          'eT_mono_times_wt_sum',
                          'eT_chro_times_wt_sum']

df_radial_bins['eT_mean_mono'] = df_radial_bins.eval('eT_mono_times_wt_sum / wt_mag_sum')
df_radial_bins['eT_mean_chro'] = df_radial_bins.eval('eT_chro_times_wt_sum / wt_mag_sum')

df_radial_bins['bin_count'] = df['bin'].value_counts()

print('Statistics for different radial bins')
print(f'RMIN = {RMIN} and DLNR = {DLNR}')

df_radial_bins.style\
.background_gradient(subset=['eT_mean_mono','eT_mean_chro'],cmap='Blues')\
.apply(lambda x: ["background-color: #DAabaa" 
                          if (v < 0) 
                          else "" for i, v in enumerate(x)], axis = 1)
Statistics for different radial bins
RMIN = 10 and DLNR = 0.5
Out[42]:
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eT_mean_mono eT_mean_chro bin_count
bin
0 12.4392 2.61292 -0.253149 -0.231666 -0.0968834 -0.0886617 5
1 21.4642 2.94549 0.0637053 0.029765 0.0216281 0.0101053 5
2 35.149 4.52689 -1.09041 -1.06006 -0.240874 -0.234169 6
3 59.6951 29.2774 7.0226 7.25474 0.239864 0.247793 35
4 79.3204 6.63948 2.09627 2.16793 0.315728 0.326521 8
5 175.433 77.6305 37.3149 37.5623 0.480673 0.483861 79
6 271.249 681.734 270.331 272.876 0.396535 0.400268 707
7 445.262 2009.16 443.896 447.293 0.220936 0.222627 2001
8 734.109 4743.97 590.261 593.796 0.124424 0.125169 4765
9 1213.44 12233.2 835.862 842.925 0.0683273 0.0689046 12232
10 1742.09 11124.3 436.155 438.476 0.0392075 0.0394161 11073
In [43]:
# why some eT values are -ve?
"""
1. For given rmin and dlnr we have some bins very few object counts.

""";
In [44]:
pd.cut(df['eT_mono'],20).value_counts()
Out[44]:
(0.00144, 0.108]     14304
(0.108, 0.214]        6224
(-0.105, 0.00144]     3129
(0.214, 0.32]         2270
(0.32, 0.426]         1178
(-0.211, -0.105]       934
(0.426, 0.533]         643
(-0.317, -0.211]       508
(-0.424, -0.317]       418
(0.533, 0.639]         402
(-0.53, -0.424]        323
(-0.636, -0.53]        229
(0.639, 0.745]         169
(-0.742, -0.636]       111
(-0.848, -0.742]        30
(0.745, 0.851]          30
(-0.957, -0.848]         6
(0.851, 0.958]           6
(1.064, 1.17]            2
(0.958, 1.064]           0
Name: eT_mono, dtype: int64
In [45]:
fig,ax = plt.subplots(figsize=(12,8))
sns.distplot(df['eT_mono'],ax=ax)
plt.title('Histogram and density plot of eT_mono')
Out[45]:
Text(0.5, 1.0, 'Histogram and density plot of eT_mono')
In [46]:
df['wt_mag'].hist(bins=100,figsize=(12,8))
plt.xlim(0,3)
Out[46]:
(0, 3)
In [47]:
pd.options.display.max_columns=500
df.head(5)
Out[47]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins above bins_mag_mono bins_mag_chro wt_mag_mono wt_mag_chro wt_mag dx dy r cos2theta sin2theta bin eX_mono eT_mono eX_chro eT_chro eT_mono_times_wt eT_chro_times_wt
0 0 0 0 0 5301 5314 5231 5117 88.17075 1847.1934 0.0196 0.0249 0.0227 0.0216 0.0200 0.0256 0.0231 0.0220 -0.4253 0.1855 0.2730 -0.3021 -0.4257 0.1904 0.2778 -0.3155 0.463994 0.407177 0.466340 0.420373 79841.4700 82737.3540 80303.9230 83923.9080 5.186953 5.293858 5.267827 5.390682 -12.255571 -12.294254 -12.261842 -12.309714 -0.07615 -0.05830 -0.07395 -0.06255 -0.34915 0.24380 -0.35175 0.25295 0.215290 0.181344 0.009198 0.009381 -12.274912 -12.285778 (0.16, 0.2] True (-12.728, -12.268] (-12.724, -12.263] 1.456448 1.425518 1.440983 -1610.82925 148.1934 1617.631650 0.983215 -0.182452 10 -0.043428 0.064235 -0.048008 0.061296 0.092561 0.088327
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.3448 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.1640 3663.4596 4.161950 4.303319 4.159870 4.301257 -8.918813 -8.912980 -8.915847 -8.909728 -0.01825 0.06230 -0.00720 0.06735 0.97965 0.52580 1.01340 0.54025 1.270152 1.236180 0.004214 0.004588 -8.915896 -8.912788 (1.198, 1.238] True (-9.044, -8.584] (-9.033, -8.571] 0.389231 0.431504 0.410367 953.56650 73.3448 956.383045 0.988237 0.152928 9 0.058776 0.008508 0.065457 -0.003184 0.003491 -0.001307
4 0 0 0 0 4634 4659 4569 4615 109.82575 1405.3212 0.3760 0.3919 0.2353 0.2783 0.3803 0.3949 0.2379 0.2831 0.2055 0.1655 -0.1817 -0.0868 0.2052 0.1698 -0.1773 -0.0987 0.263857 0.201368 0.266344 0.202921 3512.8911 3518.9333 3517.0462 3518.1529 4.338535 4.362407 4.374177 4.392268 -8.864162 -8.866028 -8.865445 -8.865787 0.01190 0.03935 0.01395 0.03555 0.19360 0.12615 0.19125 0.13425 0.069621 0.053395 0.001690 0.001458 -8.865095 -8.865616 (0.0399, 0.0799] True (-9.044, -8.584] (-9.033, -8.571] 0.384965 0.427336 0.406151 -1589.17425 -293.6788 1616.082311 0.933954 0.357394 10 0.041004 -0.025178 0.038188 -0.025734 -0.010226 -0.010452
8 0 0 0 0 5895 5967 5826 5909 1448.97510 2334.9766 0.0188 0.0227 0.0239 0.0182 0.0189 0.0229 0.0241 0.0183 -0.2900 0.9268 0.6441 -0.7562 -0.3066 0.9498 0.6584 -0.7697 0.971112 0.993329 0.998060 1.012881 44277.4560 44510.9310 44328.6760 44646.1230 4.390932 4.486290 4.404279 4.505218 -11.615457 -11.621167 -11.616712 -11.624459 0.17705 0.08530 0.17590 0.09005 -0.46705 0.84150 -0.48250 0.85975 0.943058 0.926258 0.038623 0.039050 -11.618312 -11.620586 (0.919, 0.959] True (-11.807, -11.347] (-11.801, -11.34] 0.948232 0.980210 0.964221 -250.02490 635.9766 683.358388 -0.732269 -0.681016 8 -0.183036 0.187739 -0.185731 0.190132 0.181022 0.183329
16 0 0 0 0 1916 1935 1936 1936 987.00513 2672.3523 0.0075 0.0098 0.0099 0.0079 0.0075 0.0098 0.0100 0.0079 -0.4763 0.8956 0.6350 -0.8553 -0.5089 0.9281 0.6544 -0.8827 1.014377 1.065253 1.058465 1.098817 91854.4360 93143.4830 91915.1220 93209.8860 4.097640 4.188533 4.106857 4.199608 -12.407750 -12.422881 -12.408467 -12.423655 0.07935 0.02015 0.07275 0.02270 -0.55565 0.87545 -0.58165 0.90540 1.028961 1.075160 0.006702 0.005808 -12.415316 -12.416061 (1.039, 1.078] True (-12.728, -12.268] (-12.724, -12.263] 1.644972 1.564745 1.604859 -711.99487 973.3523 1205.964923 -0.302869 -0.953032 9 -0.081726 0.043236 -0.076208 0.043668 0.069388 0.070080
In [48]:
df.query(""" r>500 """)['eT_mono'].describe()
Out[48]:
count    28553.000000
mean         0.065416
std          0.170729
min         -0.954698
25%          0.019631
50%          0.068047
75%          0.126914
max          1.170045
Name: eT_mono, dtype: float64
In [49]:
df['gm_sq'].hist(bins=100,figsize=(12,8))
Out[49]:
<matplotlib.axes._subplots.AxesSubplot at 0x10f80fb70>

Plot of eT for mono and chro

In [50]:
def plot_eT_mean(rmin, dlnr,min_bin_count,title='',
                 show_mono_chro=False,
                 show_data=True):

    df['dx'] = df['x[0]'] - 1699 # jesisim fitsfiles have shape 3398, 3398
    df['dy'] = df['x[1]'] - 1699

    df['r'] = np.hypot(df['dx'], df['dy'])

    df['cos2theta'] = df.eval(' (dx * dx - dy * dy) / (r * r)' )
    df['sin2theta'] = df.eval(' (2  * dx * dy     ) / (r * r)' )

    df['bin'] = ( np.log(df.r / rmin) / dlnr).astype(int)

    df_radial_bins = df.groupby('bin').agg({'r': 'mean',
                                            'wt_mag': 'sum',
                                            'eT_mono_times_wt': 'sum',
                                            'eT_chro_times_wt': 'sum',

                                           })


    df_radial_bins.columns = ['r_mean',
                              'wt_mag_sum',
                              'eT_mono_times_wt_sum',
                              'eT_chro_times_wt_sum']

    df_radial_bins['eT_mean_mono'] = df_radial_bins.eval('eT_mono_times_wt_sum / wt_mag_sum')
    df_radial_bins['eT_mean_chro'] = df_radial_bins.eval('eT_chro_times_wt_sum / wt_mag_sum')
    
    # eT mono vs chro ratio
    df_radial_bins['eT_ratio'] = df_radial_bins['eT_mean_mono'] / df_radial_bins['eT_mean_chro']

    # bin counts
    df_radial_bins['bin_count'] = df['bin'].value_counts()

    # init figure
    fig, ax = plt.subplots(1,1,figsize=(12,8))
    
    # min bin count
    df_radial_bins = df_radial_bins.query(""" bin_count > @min_bin_count """)
    
    # plot ratio of eT mono vs chro
    df_radial_bins.plot.line(x='r_mean',y='eT_ratio',marker='o',color='b',ax=ax)
        
        
    # also plot eT mono and eT chro
    if show_mono_chro:
        df_radial_bins.plot.line(x='r_mean',y='eT_mean_mono',marker='o',color='r', ax=ax)
        df_radial_bins.plot.line(x='r_mean',y='eT_mean_chro',marker='o',color='b',ax=ax)
    

    
    # label
    plt.ylabel('eT')
    plt.title(title)
    plt.suptitle('Plot of eT',fontsize=24,weight='bold')
    
    # also show the dataframe
    if show_data:
        display(df_radial_bins.style.background_gradient(subset=['eT_mean_mono']))
    
    plt.show()
    

rmin = 300
dlnr = 0.5
min_bin_count = 10
title = f'rmin={rmin}, dlnr={dlnr}, min_bin_count={min_bin_count}'
plot_eT_mean(rmin=rmin, dlnr=dlnr,min_bin_count=min_bin_count,title=title)
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eT_mean_mono eT_mean_chro eT_ratio bin_count
bin
-3 54.8887 22.7525 4.89088 5.07628 0.21496 0.223109 0.963476 26
-2 74.3131 13.5755 4.30603 4.41053 0.317191 0.324888 0.976306 18
-1 163.969 47.6546 23.6056 23.9433 0.495347 0.502433 0.985897 46
0 366.792 2178.41 631.649 636.764 0.289958 0.292306 0.991968 2200
1 665.863 4018.37 555.619 558.62 0.13827 0.139017 0.994628 4022
2 1099.56 10059.2 785.128 791.574 0.0780511 0.0786919 0.991857 10089
3 1657.62 14373.1 613.784 618.027 0.0427036 0.0429987 0.993135 14324
4 2259.58 193.267 4.03504 4.0014 0.0208781 0.020704 1.00841 176

Interactive plots

In [51]:
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
In [52]:
rmin = 400
dlnr = 0.4
min_bin = 20
siingle = False
In [53]:
def plotly_eT_plot(rmin=400, dlnr=0.4, min_bin=20,
                   show_mono_chro=False,
                   show_data=True,
                   title=f'rmin={rmin}, dlnr={dlnr}, min_bin={min_bin}'):
    df['dx'] = df['x[0]'] - 1699 # jesisim fitsfiles have shape 3398, 3398
    df['dy'] = df['x[1]'] - 1699

    df['r'] = np.hypot(df['dx'], df['dy'])

    df['cos2theta'] = df.eval(' (dx * dx - dy * dy) / (r * r)' )
    df['sin2theta'] = df.eval(' (2  * dx * dy     ) / (r * r)' )

    df['bin'] = ( np.log(df.r / rmin) / dlnr).astype(int)

    df_radial_bins = df.groupby('bin').agg({'r': 'mean',
                                            'wt_mag': 'sum',
                                            'eT_mono_times_wt': 'sum',
                                            'eT_chro_times_wt': 'sum',

                                           })

    df_radial_bins.columns = ['r_mean',
                              'wt_mag_sum',
                              'eT_mono_times_wt_sum',
                              'eT_chro_times_wt_sum']

    df_radial_bins['eT_mean_mono'] = df_radial_bins.eval('eT_mono_times_wt_sum / wt_mag_sum')
    df_radial_bins['eT_mean_chro'] = df_radial_bins.eval('eT_chro_times_wt_sum / wt_mag_sum')
    
    # ratio of eT mono and chro
    df_radial_bins['eT_ratio'] = df_radial_bins['eT_mean_mono'] / df_radial_bins['eT_mean_chro']

    df_radial_bins['bin_count'] = df['bin'].value_counts()
    
    # min bin count
    df_radial_bins = df_radial_bins.query(""" bin_count > @min_bin """)
    
    
    # eT mono vs chro ratio
    sc = go.Scatter( x=df_radial_bins['r_mean'],
                     y=df_radial_bins['eT_ratio'],
                     mode = 'lines+markers',
                     name = 'eT ratio')
    
    mydata = [sc]
    
    # also plot eT mono and chro
    if show_mono_chro:
        sc1 = go.Scatter(x=df_radial_bins['r_mean'],
                         y=df_radial_bins['eT_mean_mono'],
                         mode = 'lines+markers',
                         name = 'eT mono')

        sc2 = go.Scatter(x=df_radial_bins['r_mean'],
                         y=df_radial_bins['eT_mean_chro'],
                         mode = 'lines+markers',
                         name = 'eT chro')

        mydata= [sc1,sc2]
    
    # layout
    layout = go.Layout(
                    title=title,
                    xaxis=dict(
                               title='radius',
                               titlefont=dict(
                               family='Courier New, monospace',
                               size=18,
                               color='#7f7f7f')),
                     yaxis=dict(title='eT',
                                titlefont=dict(
                                          family='Courier New, monospace',
                                          size=18,
                                          color='#7f7f7f')))
    
    myfig = go.Figure(data=mydata, layout=layout)
    
    myfig.update_layout(showlegend=True)

    
    # also show the dataframe
    if show_data:
        display(df_radial_bins.style.background_gradient(subset=['eT_mean_mono']))

    # iplot plots in jupyter-notebook, plot opens in new tab.
    return iplot(myfig, filename='myplot.html')

# plot
rmin = 300
dlnr = 0.5
min_bin = 20
title = f'rmin={rmin}, dlnr={dlnr}, min_bin={min_bin}'
plotly_eT_plot(rmin=rmin, dlnr=dlnr,min_bin=min_bin,title=title)
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eT_mean_mono eT_mean_chro eT_ratio bin_count
bin
-3 54.8887 22.7525 4.89088 5.07628 0.21496 0.223109 0.963476 26
-1 163.969 47.6546 23.6056 23.9433 0.495347 0.502433 0.985897 46
0 366.792 2178.41 631.649 636.764 0.289958 0.292306 0.991968 2200
1 665.863 4018.37 555.619 558.62 0.13827 0.139017 0.994628 4022
2 1099.56 10059.2 785.128 791.574 0.0780511 0.0786919 0.991857 10089
3 1657.62 14373.1 613.784 618.027 0.0427036 0.0429987 0.993135 14324
4 2259.58 193.267 4.03504 4.0014 0.0208781 0.020704 1.00841 176
In [54]:
# plot
rmin = 400
dlnr = 0.5
min_bin = 20
title = f'rmin={rmin}, dlnr={dlnr}, min_bin={min_bin}'
plotly_eT_plot(rmin=rmin, dlnr=dlnr,min_bin=min_bin,title=title,show_mono_chro=True)
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eT_mean_mono eT_mean_chro eT_ratio bin_count
bin
-3 68.3757 27.6499 8.0128 8.34413 0.289795 0.301778 0.960292 32
-1 209.834 245.613 115.719 116.181 0.471145 0.473023 0.996029 244
0 477.506 3838.46 834.391 841.798 0.217376 0.219306 0.991201 3853
1 888.212 6742.81 667.805 671.013 0.0990397 0.0995155 0.995219 6781
2 1443.99 16252.9 878.074 886.162 0.0540256 0.0545233 0.990872 16221
3 1969.56 3783.33 114.119 113.91 0.0301637 0.0301084 1.00184 3752

ipywidgets

which pip
pip install ipywidgets
jupyter nbextension enable --py --sys-prefix widgetsnbextension
In [55]:
import ipywidgets as widgets
from ipywidgets import HBox, VBox
from ipywidgets import interactive
In [56]:
rmin_ = widgets.IntSlider(min=0, max=500, step=50, value=100)
dlnr_  = widgets.FloatSlider(min=0.1,max=0.8,step=0.05,value=0.5)
min_bin_count_ = widgets.IntSlider(min=10, max=600, step=20, value=50)
interactive_plot = interactive(plotly_eT_plot,
                               rmin=rmin_,
                               dlnr=dlnr_,
                               min_bin_count=min_bin_count_)

output = interactive_plot.children[-1]
interactive_plot

Time Taken

In [57]:
time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
      '{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))
Time taken to run whole notebook: 0 hr 0 min 42 secs
In [58]:
import subprocess
subprocess.call(['python', '-m', 'nbconvert', '*.ipynb'])
Out[58]:
0